Erin M. Buchanan
01/08/2020
#r chunk
#install.packages(c("memnet", "jsonlite", "dplyr", "tidytext", "widyr", "ggplot2", "igraph", "ggraph", "tidyr"))
library(memnet)## Loading required package: Rcpp
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:igraph':
##
## crossing
## $`70`
## [1] "cow" "chicken" "horse" "pig" "rabbit" "fox"
## [7] "wolf" "hyena" "lion" "tiger" "panther" "bear"
## [13] "elk" "moose" "reindeer" "chimp" "pony" "armadillo"
## [19] "skunk" "bear" "raccoon" "whale" "dolphin" "turtle"
## [25] "frog" "snake"
##
## $`52`
## [1] "cat" "dog" "parakeet" "leopard" "gorilla" "monkey"
## [7] "chicken" "rooster" "duck" "donkey" "horse" "cow"
## [13] "snake" "frog" "gerbil" "hamster" "squirrel" "rabbit"
## [19] "deer" "moose" "elk" "buffalo"
##
## $`49`
## [1] "chicken" "snake" "horse" "cow" "pig" "turkey"
## [7] "dog" "cat" "frog" NA "rabbit" NA
## [13] "gopher" "weasel" NA NA "opossum" "donkey"
## [19] "elephant" "giraffe" "lion" "tiger" "hyena" "deer"
## [25] "antelope" "bear" "fish" "coyote" "wolf" "fox"
## [31] "eagle" "robin" "sparrow"
##
## $`53`
## [1] "cat" "dog" "rat" "mouse" "rabbit"
## [6] "turtle" "fish" "alligator" "bear" "crocodile"
## [11] "elephant" "giraffe" "monkey" "human" "lion"
## [16] "tiger" "zebra" "hippopotamus" "orangutan"
##
## $`57`
## [1] "dog" "cat" "lizard" "bird" "whale"
## [6] "fish" "dolphin" "penguin" "gerbil" "hamster"
## [11] "rat" "armadillo" "alligator" "koala" "lion"
## [16] "tiger" "monkey" "giraffe" "elephant" "hippopotamus"
## [21] "kangaroo" "seal"
##
## $`39`
## [1] "giraffe" "elephant" "zebra" "kangaroo" "bear" "alligator"
## [7] "beaver" "dog" "cat" "rabbit" "cow" "goat"
## [13] "chicken" "sheep" "llama" "pig" "turtle" "bison"
## [19] "camel" "lion" "tiger"
##r chunk - cex controls size of text
network_plot(edg_to_adj(net_comunity), nod_cex = 2, lab_cex = 1)##r chunk - cex controls size of text
network_plot(edg_to_adj(net_threshold), nod_cex = 2, lab_cex = 1)##r chunk - cex controls size of text
network_plot(edg_to_adj(net_rw), nod_cex = 2, lab_cex = .5, lab_lwd = 1, lab_grid_size = 70)V = number of nodes E = number of edges K = degree C = Clustering coefficient L = shortest path S = Small world A = assortivity
##r chunk - can do other networks in the same way to compare
network_stats(edg_to_adj(net_comunity))## |V| |E| k C Cc L
## 91.0000000 170.0000000 1.8681319 0.2890767 14.0814358 3.6721981
## Lc S A p
## 4.1809481 3.3680006 -0.2343542 0.9780220
## |V| |E| k C Cc L
## 91.0000000 185.0000000 2.0329670 0.3829349 17.1409963 3.5095737
## Lc S A p
## 4.0615814 4.2202764 -0.1800739 0.9780220
##r chunk
#k is the number of hops
neighborhood_plot(edg_to_adj(net_comunity), k = 3, node = 'cat', nod_cex = 2, lab_cex = 1)## [1] "accessLevel" "landingPage"
## [3] "bureauCode" "issued"
## [5] "@type" "modified"
## [7] "references" "keyword"
## [9] "contactPoint" "publisher"
## [11] "identifier" "description"
## [13] "title" "programCode"
## [15] "distribution" "license"
## [17] "accrualPeriodicity" "theme"
## [19] "citation" "temporal"
## [21] "graphic-preview-description" "graphic-preview-file"
## [23] "spatial" "language"
## [25] "release-place" "creator"
## [27] "data-presentation-form" "series-name"
## [29] "issue-identification" "describedBy"
## [31] "editor" "describedByType"
## [33] "dataQuality" "rights"
## [[1]]
## [1] "international rosetta mission" "earth"
## [3] "unknown"
##
## [[2]]
## [1] "jet propulsion laboratory" "completed"
##
## [[3]]
## [1] "goddard space flight center" "completed"
##
## [[4]]
## [1] "ngda" "soils"
## [3] "biosphere" "ecological dynamics"
## [5] "earth science" "vegetation"
## [7] "atmospheric water vapor" "atmospheric radiation"
## [9] "precipitation" "ecosystems"
## [11] "atmospheric temperature" "atmosphere"
## [13] "land surface" "national geospatial data asset"
##
## [[5]]
## [1] "national geospatial data asset" "ngda"
##
## [[6]]
## [1] "near earth asteroid rendezvous" "eros"
## [1] "ROSETTA-ORBITER EARTH RPCMAG 2 EAR2 RAW V3.0"
## [2] "Sealed Planetary Return Canister (SPRC), Phase II"
## [3] "Enhanced ORCA and CLARREO Depolarizers Using AR Microstructures, Phase I"
## [4] "LINKAGES: An Individual-based Forest Ecosystem Biogeochemistry Model"
## [5] "ERBE_S4G_MFOV_NF"
## [6] "NEAR EROS RADIO SCIENCE DATA SET - EROS/ORBIT V1.0"
## [1] "This dataset contains EDITED RAW DATA of the second Earth Flyby (EAR2). The closest approach (CA) took place on November 13, 2007 at 20:57"
## [2] "Sample return missions have primary importance in future planetary missions. A basic requirement is that samples be returned in pristine, uncontaminated condition, necessitating development of a canister system capable of maintaining cleanliness and seal integrity through a variety of environments. Further development of the Sealed Planetary Return Canister (SPRC), is proposed after a successful Phase 1 program. Besides providing a high integrity seal, the canister incorporates features for robotic manipulation and to allow the sample to be accessed in a controlled manner upon return to Earth. The SPRC seal system addresses the two most significant concerns for planetary samples seal surfaces contaminated by the sample and high pressure due to the phase change of volatiles. The SPRC incorporates a novel sealing system evolved from the only marginally successful Apollo indium knife edge seal approach but with added features to address the difficulties and inconsistencies observed. The indium is contained within a protective barrier to prevent against contamination, and the knife edge is mechanically cleaned during the sealing process. The container body can be configured to accommodate a variety of samples including rock cores, rock fragments, regolith, dust, and frozen soil. Atmospheric samples can also be preserved. The design is readily scalable and adaptable to specific missions. The prototype developed in Phase 1 demonstrated a leakage rate of less than 1e-6 cc-atm/s, meeting the primary science requirement."
## [3] "Next generation Earth Science Satellites ORCA and CLARREO are designed to measure our planet's ocean and climate health. Using hyper-spectral imaging at wavelengths ranging from the UV through NIR, these instruments will record the levels of the earth's temperature rise over the course of a decade. To make such detailed measurements, polarization effects at various wavelengths due to multiple factors must be eliminated using an optical device known as a \"de-polarizer\". For the CLARREO de-polarizer, four quartz windows are needed to randomize the polarization state of the observed reflected light spectrum. Multiple reflections from 4 surfaces produce losses up to 14% of the incident light, a level high enough to produce \"ghost\" effects superimposed on the desired earth images resulting in reduced image contrast and greater measurement error. An anti-reflection (AR) treatment is needed that can withstand the radiation and temperature effects caused by the mission environment while reducing reflection losses to levels of fractions of one percent. A new type of AR treatment, being developed for many military and commercial applications, is based on surface relief microstructures fabricated directly in a window, optic, or sensor material. AR microstructures (ARMs) can suppress internal reflections to levels unattainable by conventional thin-film AR coating technology. To extend the performance benefits of ARMs to hyper-spectral imaging systems, it is proposed that the fabrication processes developed for fused silica, glass, silicon, and many other optical materials be adapted for use with the quartz and magnesium fluoride depolarizers planned for the ORCA and CLARREO missions. In addition, an investigation of innovative surface microstructure technology is proposed for the fabrication of a new type of non-scattering, micro-textured depolarizer with inherent AR properties that can be applied to multiple optical elements within a spectrometer system."
## [4] "This model product contains the source codes for version 1 of the individual-based forest ecosystem biogeochemistry model LINKAGES and two subsequent versions as well as example input and output data. LINKAGES predicts long-term structure and dynamics of forest ecosystems as constrained by nitrogen availability, climate, and soil moisture. Model simulations compare favorably to field data from different geographic areas worldwide. LINKAGES, written in FORTRAN and provided in ASCII format, simulates birth, growth, and death of all trees greater than 1.43-cm dbh. Litter fall and decomposition are also simulated. Sunlight is the driving variable. Growing season degree days, soil water availability, and AET are calculated from precipitation, temperature, soil field moisture capacity, and wilting point. Decomposition and soil N availability are calculated from organic matter quantity and carbon chemistry, evapotranspiration, and degree of canopy closure. Light availability to each tree is a function of leaf biomass of taller trees. Degree days and availabilities of light and water constrain species reproduction. These variables plus soil N constrain tree growth and carbon accumulation in biomass. Tree death probability increases with age and slow growth. Leaf, root, and woody litter are returned to the soil at the end of each year to decay the following year. Climatic and forest data for eastern North America and New South Wales are provided as example model inputs. Modelers may use their own site data within any version of LINKAGES. Example model output is also provided."
## [5] "Earth Radiation Budget Experiment (ERBE) S-4G Nonscanner, Medium Field of View (MFOV) Numerical Filter (NF) 5 degree Regional Averages in Hierarchical Data Format"
## [6] "The NEAR Eros Radio Science Data Set is a time-ordered collection of raw and partially processed data collected during the NEAR orbital mapping of the asteroid 433 Eros."
##r chunk
nasa_title <- tibble(id = metadata$dataset$identifier,
title = metadata$dataset$title)
head(nasa_title)## # A tibble: 6 x 2
## id title
## <chr> <chr>
## 1 urn:nasa:pds:context_pds3:data_set:dat… ROSETTA-ORBITER EARTH RPCMAG 2 EAR2 R…
## 2 TECHPORT_9532 Sealed Planetary Return Canister (SPR…
## 3 TECHPORT_9174 Enhanced ORCA and CLARREO Depolarizer…
## 4 C179124965-ORNL_DAAC LINKAGES: An Individual-based Forest …
## 5 C1000000743-LARC_ASDC ERBE_S4G_MFOV_NF
## 6 urn:nasa:pds:context_pds3:data_set:dat… NEAR EROS RADIO SCIENCE DATA SET - ER…
##r chunk
nasa_desc <- tibble(id = metadata$dataset$identifier,
desc = metadata$dataset$description)
head(nasa_desc)## # A tibble: 6 x 2
## id desc
## <chr> <chr>
## 1 urn:nasa:pds:context_pds3:data_set:da… "This dataset contains EDITED RAW DATA…
## 2 TECHPORT_9532 "Sample return missions have primary i…
## 3 TECHPORT_9174 "Next generation Earth Science Satelli…
## 4 C179124965-ORNL_DAAC "This model product contains the sourc…
## 5 C1000000743-LARC_ASDC "Earth Radiation Budget Experiment (ER…
## 6 urn:nasa:pds:context_pds3:data_set:da… "The NEAR Eros Radio Science Data Set …
##r chunk
nasa_keyword <- tibble(id = metadata$dataset$identifier,
keyword = metadata$dataset$keyword) %>%
unnest(keyword)
head(nasa_keyword)## # A tibble: 6 x 2
## id keyword
## <chr> <chr>
## 1 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcm… international rosetta …
## 2 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcm… earth
## 3 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcm… unknown
## 4 TECHPORT_9532 jet propulsion laborat…
## 5 TECHPORT_9532 completed
## 6 TECHPORT_9174 goddard space flight c…
unnested earlier.## [1] 24349
## [1] 24349
## [1] 101818
## Joining, by = "word"
## Joining, by = "word"
## # A tibble: 6 x 2
## id word
## <chr> <chr>
## 1 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcmag-2-ear2-raw-v3.0 roset…
## 2 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcmag-2-ear2-raw-v3.0 orbit…
## 3 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcmag-2-ear2-raw-v3.0 earth
## 4 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcmag-2-ear2-raw-v3.0 rpcmag
## 5 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcmag-2-ear2-raw-v3.0 2
## 6 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcmag-2-ear2-raw-v3.0 ear2
## # A tibble: 6 x 2
## id word
## <chr> <chr>
## 1 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcmag-2-ear2-raw-v3.0 datas…
## 2 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcmag-2-ear2-raw-v3.0 edited
## 3 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcmag-2-ear2-raw-v3.0 raw
## 4 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcmag-2-ear2-raw-v3.0 data
## 5 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcmag-2-ear2-raw-v3.0 earth
## 6 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcmag-2-ear2-raw-v3.0 flyby
## [1] 198649
## [1] 2273737
## [1] 101818
## # A tibble: 16,088 x 2
## word n
## <chr> <int>
## 1 phase 8658
## 2 data 2957
## 3 ii 2588
## 4 v1.0 1705
## 5 1 1577
## 6 system 1436
## 7 ges 1212
## 8 disc 1211
## 9 global 1126
## 10 2 1101
## # … with 16,078 more rows
## # A tibble: 54,103 x 2
## word n
## <chr> <int>
## 1 data 35586
## 2 system 16519
## 3 phase 11935
## 4 space 9859
## 5 nasa 8845
## 6 based 8835
## 7 2 8469
## 8 1 7969
## 9 design 7604
## 10 time 7586
## # … with 54,093 more rows
## # A tibble: 6,860 x 2
## # Groups: keyword [6,860]
## keyword n
## <chr> <int>
## 1 completed 9021
## 2 national geospatial data asset 7262
## 3 ngda 7262
## 4 earth science 6948
## 5 active 2885
## 6 atmosphere 2734
## 7 land surface 1673
## 8 goddard space flight center 1537
## 9 glenn research center 1535
## 10 langley research center 1390
## # … with 6,850 more rows
##r chunk
nasa_title <- nasa_title[-grep("[0-9]", nasa_title$word), ]
nasa_title %>%
count(word, sort = TRUE)## # A tibble: 12,665 x 2
## word n
## <chr> <int>
## 1 phase 8658
## 2 data 2957
## 3 ii 2588
## 4 system 1436
## 5 ges 1212
## 6 disc 1211
## 7 global 1126
## 8 space 1019
## 9 based 973
## 10 version 861
## # … with 12,655 more rows
## # A tibble: 44,792 x 2
## word n
## <chr> <int>
## 1 data 35586
## 2 system 16519
## 3 phase 11935
## 4 space 9859
## 5 nasa 8845
## 6 based 8835
## 7 design 7604
## 8 time 7586
## 9 systems 7486
## 10 surface 7456
## # … with 44,782 more rows
##r chunk
title_word_pairs <- nasa_title %>%
pairwise_count(word, id, sort = TRUE, upper = FALSE)
head(title_word_pairs)## # A tibble: 6 x 3
## item1 item2 n
## <chr> <chr> <dbl>
## 1 phase ii 2498
## 2 ges disc 1211
## 3 phase system 948
## 4 phase space 637
## 5 phase based 601
## 6 ges degree 589
##r chunk
desc_word_pairs <- nasa_desc %>%
pairwise_count(word, id, sort = TRUE, upper = FALSE)
head(desc_word_pairs)## # A tibble: 6 x 3
## item1 item2 n
## <chr> <chr> <dbl>
## 1 data set 4139
## 2 data system 3305
## 3 data time 3144
## 4 phase ii 3063
## 5 data nasa 2730
## 6 system nasa 2728
##r chunk
keyword_pairs <- nasa_keyword %>%
pairwise_count(keyword, id, sort = TRUE, upper = FALSE)
head(keyword_pairs)## # A tibble: 6 x 3
## item1 item2 n
## <chr> <chr> <dbl>
## 1 ngda national geospatial data asset 7262
## 2 ngda earth science 6857
## 3 earth science national geospatial data asset 6857
## 4 earth science atmosphere 2731
## 5 ngda atmosphere 2721
## 6 atmosphere national geospatial data asset 2721
##r chunk
set.seed(52550)
title_word_pairs %>%
filter(n >= 250) %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") + #use ?ggraph to see all the options
geom_edge_link(aes(edge_alpha = n, edge_width = n), edge_colour = "purple") +
geom_node_point(size = 5) +
geom_node_text(aes(label = name), repel = TRUE,
point.padding = unit(0.2, "lines")) +
theme_void()##r chunk
desc_word_pairs %>%
filter(n >= 2500) %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") + #use ?ggraph to see all the options
geom_edge_link(aes(edge_alpha = n, edge_width = n), edge_colour = "purple") +
geom_node_point(size = 5) +
geom_node_text(aes(label = name), repel = TRUE,
point.padding = unit(0.2, "lines")) +
theme_void()##r chunk
keyword_pairs %>%
filter(n >= 1000) %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") + #use ?ggraph to see all the options
geom_edge_link(aes(edge_alpha = n, edge_width = n), edge_colour = "purple") +
geom_node_point(size = 5) +
geom_node_text(aes(label = name), repel = TRUE,
point.padding = unit(0.2, "lines")) +
theme_void()##r chunk
keyword_cors <- nasa_keyword %>%
group_by(keyword) %>%
filter(n() >= 50) %>%
pairwise_cor(keyword, id, sort = TRUE, upper = FALSE)
head(keyword_cors)## # A tibble: 6 x 3
## item1 item2 correlation
## <chr> <chr> <dbl>
## 1 ngda national geospatial data asset 1
## 2 dashlink ames 1
## 3 knowledge sharing 1.00
## 4 schedule expedition 1.00
## 5 turbulence models 0.997
## 6 appel knowledge 0.997
##r chunk
keyword_cors %>%
filter(correlation > .6) %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = correlation, edge_width = correlation), edge_colour = "purple") +
geom_node_point(size = 5) +
geom_node_text(aes(label = name), repel = TRUE,
point.padding = unit(0.2, "lines")) +
theme_void()